{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import xgboost as xgb\n",
    "import sys,random\n",
    "import pickle\n",
    "import os\n",
    "import numpy as np"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "if not os.path.exists('unlabel_preds'):\n",
    "    os.mkdir('unlabel_preds')\n",
    "if not os.path.exists('unlabel_model'):\n",
    "    os.mkdir('unlabel_model')\n",
    "if not os.path.exists('unlabel_featurescore'):\n",
    "    os.mkdir('unlabel_featurescore')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "train_x_d_cols = pd.read_csv('./rank_d_feature_score.csv')\n",
    "train_x_d_cols = list(train_x_d_cols.iloc[10:810].feature)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "CPU times: user 17 s, sys: 708 ms, total: 17.7 s\n",
      "Wall time: 20.9 s\n"
     ]
    }
   ],
   "source": [
    "%%time\n",
    "#load data\n",
    "train_x_date = pd.read_csv('../../preprocess_data/train_x_date.csv').drop(columns=['id'])\n",
    "train_x_null = pd.read_csv('../../preprocess_data/train_x_null.csv').drop(columns=['id'])\n",
    "train_x_int = pd.read_csv('../../preprocess_data/train_x_int_filter.csv').drop(columns=['id']) \n",
    "train_x_d = pd.read_csv('../../preprocess_data/train_x_float_rank_d.csv',usecols=train_x_d_cols)\n",
    "train_x_nd = pd.read_csv('../../preprocess_data/train_x_float_rank_nd.csv').drop(columns=['id'])\n",
    "train_x = pd.concat([train_x_date,train_x_null,train_x_int,train_x_d,train_x_nd],axis=1,ignore_index=True,copy=False)\n",
    "\n",
    "train_cols = list(train_x_date.columns) +\\\n",
    "            list(train_x_null.columns) +\\\n",
    "            list(train_x_int.columns) +\\\n",
    "            list(train_x_d.columns) + \\\n",
    "            list(train_x_nd.columns)\n",
    "train_x.columns = train_cols\n",
    "\n",
    "# 剩余的6w多条数据本来是无标签的，但是我们在数据预处理的时候对其进行了打标\n",
    "unlabel_date = pd.read_csv('../../preprocess_data/unlabel_x_date.csv')\n",
    "unlabel_null = pd.read_csv('../../preprocess_data/unlabel_x_null.csv').drop(columns=['id'])\n",
    "unlabel_int = pd.read_csv('../../preprocess_data/unlabel_x_int_filter.csv').drop(columns=['id'])\n",
    "unlabel_d = pd.read_csv('../../preprocess_data/unlabel_x_float_rank_d.csv',usecols=train_x_d_cols)\n",
    "unlabel_nd = pd.read_csv('../../preprocess_data/unlabel_x_float_rank_nd.csv').drop(columns=['id'])\n",
    "unlabel = pd.concat([unlabel_date,unlabel_null,unlabel_int,unlabel_d,unlabel_nd],axis=1,ignore_index=True,copy=False)\n",
    "unlabel_cols = list(unlabel_date.columns) + \\\n",
    "            list(unlabel_null.columns) + \\\n",
    "            list(unlabel_int.columns) + \\\n",
    "            list(unlabel_d.columns) + \\\n",
    "            list(unlabel_nd.columns)\n",
    "unlabel.columns = unlabel_cols"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "正负样本不均衡 {0.0: 37921, 1.0: 5525}\n"
     ]
    }
   ],
   "source": [
    "# 把已打标的数据加入训练集\n",
    "unlabel_y = pd.read_csv('unlabel_y.csv')\n",
    "unlabel_merge = pd.merge(unlabel_y, unlabel, how = 'inner', on='id', copy=False)\n",
    "unlabel_merge = unlabel_merge.drop(columns=['id','label'])\n",
    "\n",
    "# train_ay\n",
    "train_y = pd.read_csv('../../preprocess_data/train_y_33465.csv')\n",
    "unlabel_y = unlabel_y[['label']]\n",
    "train_ay = pd.concat([train_y,unlabel_y],axis=0,ignore_index=True,copy=False)\n",
    "# train_ax\n",
    "train_ax = pd.concat([train_x,unlabel_merge],axis=0,ignore_index=True,copy=False)\n",
    "dtrain = xgb.DMatrix(train_ax.values, feature_names=train_ax.columns, label=train_ay.values)\n",
    "print('正负样本不均衡',train_ay.label.value_counts().to_dict())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "valid_date = pd.read_csv('../../preprocess_data/valid_date.csv')\n",
    "valid_null = pd.read_csv('../../preprocess_data/valid_null.csv').drop(columns=['id'])\n",
    "valid_int = pd.read_csv('../../preprocess_data/valid_int_filter.csv').drop(columns=['id'])\n",
    "valid_d = pd.read_csv('../../preprocess_data/valid_float_rank_d.csv',usecols=train_x_d_cols)\n",
    "valid_nd = pd.read_csv('../../preprocess_data/valid_float_rank_nd.csv').drop(columns=['id'])\n",
    "valid = pd.concat([valid_date,valid_null,valid_int,valid_d,valid_nd],axis=1,ignore_index=True,copy=False)\n",
    "valid_cols = list(valid_date.columns) + \\\n",
    "            list(valid_null.columns) + \\\n",
    "            list(valid_int.columns) + \\\n",
    "            list(valid_d.columns) + \\\n",
    "            list(valid_nd.columns)\n",
    "valid.columns = valid_cols\n",
    "# pd.concat后特征名没有了，变成数字了\n",
    "valid_id = valid.id\n",
    "valid = valid.drop(columns=['id'])\n",
    "dvalid = xgb.DMatrix(valid.values, feature_names=valid.columns)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 本地交叉验证"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "params={\n",
    "    'booster':'gbtree',\n",
    "    'objective': 'binary:logistic',\n",
    "    'early_stopping_rounds':100,\n",
    "    'scale_pos_weight': float(len(train_ay)-np.sum(train_ay.values))/float(np.sum(train_ay.values)),  # 负例样本除以正例样本\n",
    "    'eval_metric': 'auc',\n",
    "    'gamma':1,\n",
    "    'max_depth':6,\n",
    "    'lambda':1,\n",
    "    'subsample':0.9,\n",
    "    'colsample_bytree':0.9,\n",
    "    'min_child_weight':1, \n",
    "    'eta': 0.04,\n",
    "    'seed':1080,\n",
    "    'nthread':16\n",
    "        }\n",
    "result = xgb.cv(params,dtrain,num_boost_round=800,nfold=5,shuffle=True,verbose_eval=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<matplotlib.axes._subplots.AxesSubplot at 0x7f3083c79e10>"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYQAAAD8CAYAAAB3u9PLAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvIxREBQAAIABJREFUeJzt3Xl8VPW9+P/XO5ONhLBkAQNhVZQthiUgSnEpFYMLIPp1qVbpIlWr3mstFb9ytdV6ta2/uvSq96JFxRW+qJWrUHGB69W6JEBAdgJECGtYAiSEJDPz/v1xTsIQApmsM2Hez8djHnPmc875zPtMJp/3fD5nE1XFGGOMiQp1AMYYY8KDJQRjjDGAJQRjjDEuSwjGGGMASwjGGGNclhCMMcYAlhCMMca4LCEYY4wBLCEYY4xxRYc6gIZITU3V3r17hzoMY4xpU5YuXbpXVdPqW65NJYTevXuTl5cX6jCMMaZNEZHvg1nOhoyMMcYAlhCMMca4LCEYY4wBLCEYY4xxWUIwxhgDBJkQRGSWiOwRkVUnmS8i8qyIFIjIShEZFjDvVhHZ6D5uDSgfLiLfues8KyLS9M0xxhjTWMH2EF4Bck4xfzzQz31MBV4AEJFk4GHgPGAk8LCIdHbXeQG4LWC9U9VvjDGmhQV1HoKqfi4ivU+xyERgtjr34/xaRDqJSDpwMfCxqu4HEJGPgRwRWQJ0UNWv3fLZwCRgYSO3w5iT8vuVSp+fiio/FV4fFV4/Xr/i9fmp8ilev/vsc8v9il8VVcXvBwX3NahqzWt/9WsFxVnW786vLvdrXWXHPytOXbXVvrutU8upl6lLXbfJPbHu+t+/rhjqXqYNaUO3EL71gt6ktI9r0fdorhPTugPbAl4XuWWnKi+qo/wEIjIVp9dBz549mylcE858fuVQeRUl5VWUHKnkYHlVzaPkSBWHyqsoq/RxpNJLWYWP8irn+WiV09hXVD97nQRQ5Ws7//Sm9bWVweoJQ7q3mYTQYlR1JjATIDs72/6z27iyCi/f7zvCzoPl7Dx4lF0HjzrPh8rZe7iSfWUV7C+rrPMXc7V2MR4S46JJjPOQEBtNQqyHpPhouiTFER/jIS46itjoKOKiPcTFRBFXPR0dRVxMFDGeKGI8QnTUsedojxDjiSI6Soj2CFHiPEQgym0xAl9HidOQiAgCxy1fXR7lLiu4ywW8jhJBojg2LeDMOV4wjVXtZYKtp3ZRXbvx6nr7E96vrbSopl7NlRC2Az0CXme4Zdtxho0Cy5e45Rl1LG9OAyVHKtlUXMbW/WUU7j3C1v1H+H5fGVv3H2FvaeVxy3qihK5JcXTtGE/v1ASG9+5MamIsnRNj6ZQQQ8d2MXRsF+s+O4/YaDs4zpiW0FwJYT5wl4i8jbMD+aCq7hSRj4B/D9iRPA54QFX3i8ghERkFfAPcAvy1mWIxrcjr87Nu12GWbT3A8q0lLNt6gO/3HamZLwLpHeLpmZLA2P5d6ZWaQK/kRLp3bkd6x3hS28fhibJfmMaEg6ASgoi8hfNLP1VEinCOHIoBUNX/BBYAlwMFwBHgp+68/SLyKJDrVvVI9Q5m4E6co5fa4exMth3KbYCqsnFPKV8W7OXLgr18vXk/pRVeANKS4hjaoxPXj+hB/zOS6JmcSEbndsTHeEIctTEmGFLXEQjhKjs7W+1qp63vQFkl/1uwlyXr9/D5hr3sLa0AoHdKAqPPSuW8vikM69mJ7p3a2XiyMWFIRJaqanZ9y4X9TmUTGoV7y1i4ahcfrd7FiqISVKFTQgwX9kvjB2elcv6ZKfRITgh1mMaYZmQJwdTYfegoc3O3sWDVLtbuPARAVo9O/MvYflx4dhpZGZ1svN+Y05glhAinquQWHuDVrwr5aNUufKpk9+rMjCsGMD4zne6d2oU6RGNMK7GEEKGOVvl4b/l2Xv1nIet2HaZDfDQ/Hd2bm0f1oldKYqjDM8aEgCWECOP1+XlnWRFPf7KRnQeP0v+MJJ6YnMnEId1pF2tHAxkTySwhRAhVZeGqXTy5aD2bi8vI6tGJP1+bxeizUuzIIGMMYAkhIqzecZDfzV9NbuEB+nVpz3/9ZDjjBna1RGCMOY4lhNPY5uJS/vSP9Sxas4tOCbE8PjmT67J72JFCxpg6WUI4DZVVePnrZwX87YvNxEV7uOPiM5l64Zl0bBcT6tCMMWHMEsJp5uvN+/j1nHx2HDzKtcMz+G3OOXRJig91WMaYNsASwmnC6/Pz1CcbeH7JJnqnJPLOHeczvFdyqMMyxrQhlhBOA1v3HeE381bw7Zb9XJedwcNXDSIxzv60xpiGsVajjftkzW7unZsPwJP/J4trh2fUs4YxxtTNEkIbpar87YstPLZgLYO6deCFm4bbxeaMMU1iCaENOlrl46H3VzE3r4jxg8/gqeuH2D0HjDFNZgmhjdlbWsFPX87lu+0HufuHZ3Hvj84mys4rMMY0A0sIbUhe4X5+PXcFew4f5cVbsrl0YNdQh2SMOY3Y3crbiC827uXHL32Dosz+2XmWDIwxzc56CG3A15v38YvZufRNTeTN20aRnBgb6pCMMaehoHoIIpIjIutFpEBEptcxv5eIfCoiK0VkiYhkuOWXiEh+wOOoiExy570iIlsC5g1p3k07PSz9fj8/eyWX7p3a8fovzrNkYIxpMfX2EETEAzwHXAoUAbkiMl9V1wQs9iQwW1VfFZEfAo8DP1HVxcAQt55koABYFLDeNFWd1zybcvpZWVTClFm5dEmK483bRpHaPi7UIRljTmPB9BBGAgWqullVK4G3gYm1lhkIfOZOL65jPsC1wEJVPdLYYCPJjpJyfvZKHh0TYnjztlF07WDXIzLGtKxgEkJ3YFvA6yK3LNAKYLI7fTWQJCIptZa5AXirVtlj7jDTUyJiP39dR6t83PXmMo5W+Xh5ygi62X2NjTGtoLmOMvoNcJGILAcuArYDvuqZIpIOZAIfBazzANAfGAEkA/fXVbGITBWRPBHJKy4ubqZww9fRKh+3v76UZVtL+OM159Kva1KoQzLGRIhgEsJ2oEfA6wy3rIaq7lDVyao6FHjQLSsJWOQ64D1VrQpYZ6c6KoCXcYamTqCqM1U1W1Wz09LSgtqotsrnV+54fSlL1hfz+ORMrjg3PdQhGWMiSDAJIRfoJyJ9RCQWZ+hnfuACIpIqItV1PQDMqlXHjdQaLnJ7DYhzH8dJwKqGh396+Y/PCli8vphHJw7ixpE9Qx2OMSbC1JsQVNUL3IUz3LMWmKuqq0XkERGZ4C52MbBeRDYAXYHHqtcXkd44PYz/qVX1GyLyHfAdkAr8oUlb0sb9Y9UunvpkA5OHdufmUb1CHY4xJgKJqoY6hqBlZ2drXl5eqMNodmt3HuKaF/5Jv65JzJk6yi5UZ4xpViKyVFWz61vOLl0RYoeOVnHb7Dzax0Uz8yfDLRkYY0LGLl0RYo8vWMf2knLm3X6BnWtgjAkp6yGE0LvLinjr261MHdOX4b06hzocY0yEs4QQIlv3HeGh91czsncyv83pH+pwjDHGEkIoeH1+/nXOckTgL9dn4bEb3BhjwoDtQwiBF5ZsYtnWEp69cSgZne0+yMaY8GA9hFa2Yfdh/vpZAVeem86ErG6hDscYY2pYQmhF1Ret69AumoevGhTqcIwx5jg2ZNSKHv1gDRt2l/Lqz0aSlmQXdzXGhBfrIbSS95YX8cY3W/nlhX256OzT+yJ9xpi2yRJCK1i94yD3v/Mdo/om85vLzgl1OMYYUydLCC3M71cefG8VHeKjef6m4cR47CM3xoQna51a2LxlReRvK2H6+AEkJ8aGOhxjjDkpSwgt6OCRKv64cB3De3Vm8tDadx01xpjwYkcZtaCnPtnAgSOVvDphJFF2NrIxJsxZD6GFfFmwl1e/KuSm83oxuHvHUIdjjDH1soTQAlSVv3y8gW4d2/HgFQNCHY4xxgTFEkILeDt3G0u/P8DtF/W1G94YY9oMSwjNbG9pBX/8xzrO75vCj8+zeyMbY9oOSwjNyOdX7nh9KUcqfTx01UC7rLUxpk0JKiGISI6IrBeRAhGZXsf8XiLyqYisFJElIpIRMM8nIvnuY35AeR8R+catc46ItPmD9P/r803kFh7gicmZDEjvEOpwjDGmQepNCCLiAZ4DxgMDgRtFZGCtxZ4EZqvqucAjwOMB88pVdYj7mBBQ/kfgKVU9CzgA/LwJ2xFyq3cc5KmPN3BFZjpX2zkHxpg2KJgewkigQFU3q2ol8DYwsdYyA4HP3OnFdcw/jogI8ENgnlv0KjAp2KDDjd+v3Dd3BZ0TYvnDpME4m2eMMW1LMAmhO7At4HWRWxZoBTDZnb4aSBKRFPd1vIjkicjXIlLd6KcAJarqPUWdAIjIVHf9vOLi4iDCbX0fr93Nul2HmT6+P53t8hTGmDaquXYq/wa4SESWAxcB2wGfO6+XqmYDPwaeFpEzG1Kxqs5U1WxVzU5LC7/LRvv8yhML19GvS3uuPNfugGaMabuCSQjbgR4BrzPcshqqukNVJ6vqUOBBt6zEfd7uPm8GlgBDgX1AJxGJPlmdbcUHK3ewZW8Z9156NrHRdtCWMabtCqYFywX6uUcFxQI3APMDFxCRVBGprusBYJZb3llE4qqXAUYDa1RVcfY1XOuucyvwflM3prUV7i3jd/NXk9m9I5cNOiPU4RhjTJPUmxDccf67gI+AtcBcVV0tIo+ISPVRQxcD60VkA9AVeMwtHwDkicgKnATwhKqucefdD/xaRApw9in8rZm2qVX4/co9by9HgadvGGLnHBhj2rygrnaqqguABbXKHgqYnsexI4YCl/knkHmSOjfjHMHUJr2zrIiVRQd56voszkxrH+pwjDGmyWzQuxFKK7z86aP1DO3ZiYlZds6BMeb0YAmhEWZ9sYXiwxU8fNUgu8+BMea0YQmhgUorvLzxzff84KxUhvToFOpwjDGm2VhCaKDHF6xlz+EK7r20X6hDMcaYZmUJoQGWbz3AG99s5eej+zC8V3KowzHGmGZlCSFIqsqjH6whLSmOf7307FCHY4wxzc4SQpC+2ryPZVtL+PWlZ9M+LqijdY0xpk2xhBCk1776nsRYD5OG2GGmxpjTkyWEIHy7ZT8LV+1i6oVn0i7W7pFsjDk9WUKoh6ry7wvWckaHeKZe2DfU4RhjTIuxhFCPT9buIX9bCfde2s96B8aY05olhHo8t7iAXikJXDMso/6FjTGmDbOEcApb9paRv62E60f0INpjH5Ux5vRmrdwpPLFwLQmxHq6yO6EZYyKAJYSTWPr9fhat2c2tF/SmR3JCqMMxxpgWZwnhJOYt3U5CjIdfXXJWqEMxxphWYQmhDgV7DvPO0iJyBqfbWcnGmIhhCaEWn1/57byVJMR5mD6+f6jDMcaYVmMJoZZ/rNrFsq0lPHTlQNKS4kIdjjHGtJqgEoKI5IjIehEpEJHpdczvJSKfishKEVkiIhlu+RAR+UpEVrvzrg9Y5xUR2SIi+e5jSPNtVuPNW7qN9I7xTLRrFhljIky9CUFEPMBzwHhgIHCjiAystdiTwGxVPRd4BHjcLT8C3KKqg4Ac4GkRCbzN2DRVHeI+8pu4LU1WcqSSzzfuZeKQ7njs1pjGmAgTTA9hJFCgqptVtRJ4G5hYa5mBwGfu9OLq+aq6QVU3utM7gD1AWnME3hI+37gXn18ZN6hrqEMxxphWF0xC6A5sC3hd5JYFWgFMdqevBpJEJCVwAREZCcQCmwKKH3OHkp4SkZAP2H+2djedE2LIyrB7JRtjIk9z7VT+DXCRiCwHLgK2A77qmSKSDrwG/FRV/W7xA0B/YASQDNxfV8UiMlVE8kQkr7i4uJnCPdH6XYd5f8UOcgan23CRMSYiBZMQtgM9Al5nuGU1VHWHqk5W1aHAg25ZCYCIdAA+BB5U1a8D1tmpjgrgZZyhqROo6kxVzVbV7LS0lhttmrd0G9FRwm/G2e0xjTGRKZiEkAv0E5E+IhIL3ADMD1xARFJFpLquB4BZbnks8B7ODud5tdZJd58FmASsasqGNIXX5+e95Tu45JwupLQP+ciVMcaERL0JQVW9wF3AR8BaYK6qrhaRR0RkgrvYxcB6EdkAdAUec8uvAy4EptRxeOkbIvId8B2QCvyhuTaqoT5bt4e9pRVMtktcG2MimKhqqGMIWnZ2tubl5TVrnarKVf/xBYfKvXx630XE2GWujTGnGRFZqqrZ9S0X8a3fR6t3s2r7Ie4Z28+SgTEmokV8C/gfizfSNzWRSUPsngfGmMgW0Qlh3a5DrNp+iJtH9bI7ohljIl5Et4Ivfr6F9nHRTBpq1y0yxpiITQiVXj+L1uxi/OAzSE6MDXU4xhgTchGbEL7ctJfDR72Mzzwj1KEYY0xYiNiE8I/vdpEUF83os1JDHYoxxoSFiEwIpRVePli5g3GDziAu2hPqcIwxJixEZEJY+v0Byip9TBpqh5oaY0y1yEwIhfsRgSE97DLXxhhTLSITwsdr95DdqzNJ8TGhDsUYY8JGxCWEvaUVrN15iIvP6RLqUIwxJqxEXEL4dst+AM4/M6WeJY0xJrJEhzqA1vb15n0kxHrI7N4x1KEYYwyoQmUZ+L3HyqTWXRslCmISIaplf8NHZELI7p1sVzY1pw/VExuQ+ngroHQ3+KqcxibK4zyL+xwVDajTSPmqnOfqh68K/FVOHd6jzvLR7cBbDp44iE10Hp4YJ7amEHHqryiFisNQcQgqS53X6nfe31cJfh+ozynz+51pv8+J01dVx2fmD9iuKmfZOrezju2ua1m/1ykH57OMinY+yyj3M43yuPF4nXh9lU4dvipnmiA+p1/lQlrL3tExohLCvtIKNuwutWsXhZLPC5WHnV9EFaXOc+BrfxXEJTkNid/r/COpOv/A1Q+/z20AxZnvPQoHi5x/rKgYpzEA5x8xOhaqjjrrVZZB+QE4WgKVRyAmHmISnMZL1f3HxKnbEwvRcU4D53EPPqhudKqfq8qdRrF6HeRYAxYdf6wRqP6nr66/tpoG2T0n5tAOp/EL3Oaah89p8HwVznZ5jzrT4nHjjXE+g+Ma+SgnNp/bgFc35JEgKsb9+7kJsyZxCniinflR0e70KV7HtHO+l1ExzufqiQmY7yaAKPfvF/g9CZyO8jjfq+q/UfW0J+ZY3UBNcqhO9KpOWWLLn0QbUQmhev/BqL62/6BR/D4oKz7WwJTtgVL3UbbH+cVZXuI2lG5jVXUUqo44jdzRkhZsiMT5x/JVHmtYaxKD2yjGJkJ8J2jX2UkE5Qfg4HYnUQhO4w/Hfj36KtzGs+L4Bra68Y5NcP6p4dg/rarzvt6KWo1A7PENUw09luSqf+EmdYNOPY8ll9qPKLfxj453H3HHfnl6K52kWpM8A5JJdJzTsEXHQWx7SDrD2eaaJBeYdHzH3utkDWVMvLu+3+kdRMc7MVQecX7F1wyBNLD3ctzH48YTlwRxHdznJOdvWZ14PbHH/iaBf58WHl45HUVUQtiwuxSAgekdQhxJGKkohf2bnF+kZcVO435oBxzeCUf2O+WVpU5DX7rr+HHO2jxxkJDsNjrtnAYjOt5pgLsMdOZV/zPHtoe49s5zbHunLC7JaWwqDh/7x65umGoaQ/e5ugH2+52GNind6Q0EUnUadk9Mw4dUjIlAEZUQdh4sJ7V9HPExEXK5Cr/PadhLtsKetbBjudPgH9l37FFx6MT1omKgQzokpDqNdkIvOCMTOnRzGl5wGtv2aZDYBdp3dabjOoRXwytyYpIwxpxURCWE7SXldO8UH+owWs7BItj2LezMh6I8JwFUHTk2PyHVadQTUyG5DySkQPsukHwmtOsEiW4Dn5Bi3W1jIlBQCUFEcoBnAA/wkqo+UWt+L2AWkAbsB25W1SJ33q3ADHfRP6jqq275cOAVoB2wAPgX1aYeknBqO0rKObtrUku+Rcvy+6DkeyjeAAe2wIFCKNsL+wqccfB9BYA6Y6pnnAvDboG0c6BTLycBdO4TXr/gjTFhpd6EICIe4DngUqAIyBWR+aq6JmCxJ4HZqvqqiPwQeBz4iYgkAw8D2Ti7zpe66x4AXgBuA77BSQg5wMLm27TjqSo7So62rTOUq8qdX/nbl0HRt1DwqTOeXy22vfNrv1NP53Hu9XD2OEjr7+w4NMaYBgimhzASKFDVzQAi8jYwEQhMCAOBX7vTi4G/u9OXAR+r6n533Y+BHBFZAnRQ1a/d8tnAJFowIZQcqaK8yke3Tu1a6i2arvIIbPoUtn7tPHaucI4YAWecPvP/QPdhToOf3NcZ2rFf/MaYZhJMQugObAt4XQScV2uZFcBknGGlq4EkEUk5ybrd3UdRHeUtpnBfGQDdwykh+P2wPQ82fQaFX0BRrnNYpifOafjP/xX0OA96jLTG3xjT4pprp/JvgP8QkSnA58B2wNccFYvIVGAqQM+ePRtdzzfuOQjDeoX4ktd+P2z7Btb8HdbMh8M7AIH0cyH7Z3D2ZdDzfBvyMca0umASwnagR8DrDLeshqruwOkhICLtgWtUtUREtgMX11p3ibt+xqnqDKh7JjATIDs7u9E7nbfuP0JKYixdkkJ0lNHBIvjqOVj9nnMoqCcOzvoRDPo99LvUOVbfGGNCKJiEkAv0E5E+OI32DcCPAxcQkVRgv6r6gQdwjjgC+Aj4dxGpbu3GAQ+o6n4ROSQio3B2Kt8C/LXJW3MKZRVekuJDcJTtwSL46nn45gXnpKt+42DgJKcnEG8nyBljwke9LaSqekXkLpzG3QPMUtXVIvIIkKeq83F6AY+LiOIMGf3KXXe/iDyKk1QAHqnewQzcybHDThfSgjuUAUqPekmMa6WEoOoMCeW9DIX/65x+P/QncOE06NyrdWIwxpgGCqqFVNUFOIeGBpY9FDA9D5h3knVncazHEFieBwxuSLBNUVrRCgnB74cNC2HZa85zx55wwT0wfIpzHoAxxoSxiDlTubTCyxkdWnD/waEd8I8HnJ5BXEcY+zCM/pdjV0A0xpgwFzEJoaylegiqsPRlWPRvztnCYx92egWeiPlojTGniYhptVpkyOjgdlg0A1a/C30uhCufhpQzm/c9jDGmlURMQjhS6SMxthmHb9YvhPl3w9FDcPH/dXYY2wXhjDFtWMQkhEqvn9joZmiwVeHbmbDwfueS0Ld+AF36N71eY4wJsYhICH6/4vVr0xNC8Xr44F74/ks45wq49m/OzWCMMeY0EBEJodLnByDG04SEcKAQZuUACuP/7FxmwnYcG2NOIxHRolUnhLjG9hAOFMLLVzj3dr1tse04NsacliJiL2iVtwk9BF8VvH2Tcx+CW//bkoEx5rQVUT2EBu9DKNsL7/wCdq+CG96E9KwWiM4YY8JDZCQEt4cQ25AewuFdzj6DA4Vw6aPQ/4qWCc4YY8JERCSEquqdysH2EMr2wqsToHQPTPkQeo9uweiMMSY8RMQ+hIqG9BD2bYLZE52b2d8015KBMSZiREgPwbmvTmx0PbegrCh1kkFZMVz/BvT+QStEZ4wx4SEiEsKxfQj1XLrif/8/OLgNfvoP6HV+K0RmjDHhIyKGjKqCOcpo13fwz2ch60ZLBsaYiBQRCaGy5jyEkwwZ+arg73dAu2S47N9bMTJjjAkfETFkVLNT+WQ9hMWPOT2E616DhORWjMwYY8JHRPQQvP5TnKmc/xZ88ZRzm8uBE1o3MGOMCSORkRDco4yio2oNGVWWwScPQ8YI54J1xhgTwYJKCCKSIyLrRaRARKbXMb+niCwWkeUislJELnfLbxKR/ICHX0SGuPOWuHVWz+vSvJt2jNfvJIQTegj//CuU7oZxf4Do2JZ6e2OMaRPq3YcgIh7gOeBSoAjIFZH5qromYLEZwFxVfUFEBgILgN6q+gbwhltPJvB3Vc0PWO8mVc1rpm05Ka97lJEnsIewfwt8/mcYfA30HNXSIRhjTNgLpocwEihQ1c2qWgm8DUystYwCHdzpjsCOOuq50V231VW5PYTowKOM/uePEBUN4x4LRUjGGBN2gkkI3YFtAa+L3LJAvwNuFpEinN7B3XXUcz3wVq2yl93hon8TkTqPCRWRqSKSJyJ5xcXFQYR7Ip/bQ4iuvudxyVZYOQdG/AI6pDeqTmOMOd00107lG4FXVDUDuBx4TURq6haR84AjqroqYJ2bVDUTGOM+flJXxao6U1WzVTU7LS2tUcF5a/cQ1v43qB9G/LxR9RljzOkomISwHegR8DrDLQv0c2AugKp+BcQDqQHzb6BW70BVt7vPh4E3cYamWkTNTuWoKOcktPy3oGsmJPdtqbc0xpg2J5iEkAv0E5E+IhKL07jPr7XMVmAsgIgMwEkIxe7rKOA6AvYfiEi0iKS60zHAlcAqWshxO5XXfQC7v4ORv2iptzPGmDap3qOMVNUrIncBHwEeYJaqrhaRR4A8VZ0P3Ae8KCL34uxgnqKq6lZxIbBNVTcHVBsHfOQmAw/wCfBis21VLdVXO43xCGz+H4hNgiE3t9TbGWNMmxTUpStUdQHOzuLAsocCptcAdd44QFWXAKNqlZUBwxsYa6P5/IonShAR2PI/zj0OPBFx1Q5jjAlaRJypXOX3u8NFC2D/ZjjrR6EOyRhjwk5EJASvT4mJEtj2tXPuwfApoQ7JGGPCTkQkhOohI/Ztco4s8sSEOiRjjAk7EZEQqnx+5zpG+zdD8pmhDscYY8JSRCQEn1+JiVLn+kV27oExxtQpIhJClU85I6oEvOWQYgnBGGPqEhEJwev3c6a4J1ennBXaYIwxJkxFSEJQztStzosug0IbjDHGhKnISAg+Pxm6G+I6QvvGXSDPGGNOdxGREHx+JZEjEN8x1KEYY0zYioiE0D4umo6eSohLCnUoxhgTtiIiITx9w1Cyz4iGuPahDsUYY8JWRCQEACoOQ6wlBGOMOZnISQiVpTZkZIwxpxA5CaHisA0ZGWPMKUROQqgsc26MY4wxpk6RkxCqyiEmPtRRGGNM2IqMhOD3g78Koi0hGGPMyURGQvBVOM+e2NDGYYwxYSyohCAiOSKyXkQKRGR6HfN7ishiEVkuIitF5HK3vLeIlItIvvv4z4B1hovId26dz4qINN9m1eI96jxbD8EYY06q3oQgIh7gOWA8MBC4UUQG1lpsBjBXVYeU3e+CAAASc0lEQVQCNwDPB8zbpKpD3MftAeUvALcB/dxHTuM3ox7eSuc52noIxhhzMtFBLDMSKFDVzQAi8jYwEVgTsIwCHdzpjsCOU1UoIulAB1X92n09G5gELGxQ9MGyHoIxQamqqqKoqIijR4+GOhTTCPHx8WRkZBAT07jbBAeTELoD2wJeFwHn1Vrmd8AiEbkbSAR+FDCvj4gsBw4BM1T1f906i2rV2b1hoTeAr7qHYAnBmFMpKioiKSmJ3r1705KjuKb5qSr79u2jqKiIPn36NKqO5tqpfCPwiqpmAJcDr4lIFLAT6OkOJf0aeFNEOpyinhOIyFQRyRORvOLi4sZFV91DsJ3KxpzS0aNHSUlJsWTQBokIKSkpTerdBZMQtgM9Al5nuGWBfg7MBVDVr4B4IFVVK1R1n1u+FNgEnO2un1FPnbjrzVTVbFXNTktr5L0MvO5RRtZDMKZelgzarqb+7YJJCLlAPxHpIyKxODuN59daZisw1g1oAE5CKBaRNHenNCLSF2fn8WZV3QkcEpFR7tFFtwDvN2lLTqUmIVgPwZhwVlJSwvPPP1//grVcfvnllJSUtEBEkaXehKCqXuAu4CNgLc7RRKtF5BERmeAudh9wm4isAN4CpqiqAhcCK0UkH5gH3K6q+9117gReAgpweg4ts0MZbKeyMW3EyRKC1+s95XoLFiygU6dOLRVWxAhqH4KqLlDVs1X1TFV9zC17SFXnu9NrVHW0qma5h5cucsvfUdVBbtkwVf3vgDrzVHWwW+ddbgJpGTU7leNa7C2MMU03ffp0Nm3axJAhQxgxYgRjxoxhwoQJDBzoHOk+adIkhg8fzqBBg5g5c2bNer1792bv3r0UFhYyYMAAbrvtNgYNGsS4ceMoLy+v871OVlf79scugjlv3jymTJkCwO7du7n66qvJysoiKyuLf/7znyfU+bvf/Y5bb72VMWPG0KtXL959911++9vfkpmZSU5ODlVVVQAsXbqUiy66iOHDh3PZZZexc+dOAF588UVGjBhBVlYW11xzDUeOHAFgypQp3HPPPVxwwQX07duXefPmNeFTPrlgjjJq+7x2prIxDfX7/17Nmh2HmrXOgd068PBVg046/4knnmDVqlXk5+ezZMkSrrjiClatWlVz1MysWbNITk6mvLycESNGcM0115CSknJcHRs3buStt97ixRdf5LrrruOdd97h5ptvPuG9gqkr0D333MNFF13Ee++9h8/no7S0tM7lNm3axOLFi1mzZg3nn38+77zzDn/605+4+uqr+fDDD7niiiu4++67ef/990lLS2POnDk8+OCDzJo1i8mTJ3PbbbcBMGPGDP72t79x9913A7Bz506++OIL1q1bx4QJE7j22mtP/WE3QmQkBL/b3Yxq3LG5xpjQGDly5HGHUD777LO89957AGzbto2NGzee0Ij36dOHIUOGADB8+HAKCwvrrDuYugJ99tlnzJ49GwCPx0PHjnXfo338+PHExMSQmZmJz+cjJ8c55zYzM5PCwkLWr1/PqlWruPTSSwHw+Xykp6cDsGrVKmbMmEFJSQmlpaVcdtllNfVOmjSJqKgoBg4cyO7du08aZ1NERkLwOd00PJGxucY0h1P9km8tiYmJNdNLlizhk08+4auvviIhIYGLL764zkMs4+KODQ17PB7Ky8vZtm0bV111FQC33347/fv3P2ldgUfq1HcI53PPPceLL74IOPsxAt8/KiqKmJiYmvqioqLwer2oKoMGDeKrr746ob4pU6bw97//naysLF555RWWLFlS53a11Ah7ZFzczu8mBOshGBPWkpKSOHz4cJ3zDh48SOfOnUlISGDdunV8/fXXQdfbo0cP8vPzyc/P5/bbbz9lXV27dmXt2rX4/f6aHgTA2LFjeeGFFwDnV/3Bgwf51a9+VVNvt27dgorlnHPOobi4uCYhVFVVsXr1agAOHz5Meno6VVVVvPHGG0FvX3OJjIRQ00OwhGBMOEtJSWH06NEMHjyYadOmHTcvJycHr9fLgAEDmD59OqNGjWr0+5yqrieeeIIrr7ySCy64oGYoB+CZZ55h8eLFZGZmMnz4cNasWVNX1fWKjY1l3rx53H///WRlZTFkyJCaHdSPPvoo5513HqNHj6Z///6N3r7GkpY8uKe5ZWdna15eXsNX/GYmLJwG0zZD4snHCI2JdGvXrmXAgAGhDsM0QV1/QxFZqqrZ9a0bGT2EmiEjT2jjMMaYMBYZCcGGjIwxpl6RkRBsp7IxxtQrQhKCz3m2HoIxxpxUZCQEXxWIB+wqjsYYc1KRkRD8VRBlJ6UZY8ypREhC8NlwkTFtQGMvfw3w9NNP11wMzjROZCQEn/UQjGkLLCGEVmQkBH+V9RCMaQMCL389bdo0/vznPzNixAjOPfdcHn74YQDKysq44ooryMrKYvDgwcyZM4dnn32WHTt2cMkll3DJJZecUG9hYSFjxoxh2LBhDBs2rObM4CVLlnDllVfWLHfXXXfxyiuvAJCbm8sFF1xAVlYWI0eOrPOSGhdffDH33nsv2dnZDBgwgNzcXCZPnky/fv2YMWNGzXKvv/46I0eOZMiQIfzyl7/E53MOdLnjjjvIzs5m0KBBNdsHzuW8H374YYYNG0ZmZibr1q1r+ocbhMj42eyrskNOjWmohdNh13fNW+cZmTD+iZPODrz89aJFi5g3bx7ffvstqsqECRP4/PPPKS4uplu3bnz44YeAc42jjh078pe//IXFixeTmpp6Qr1dunTh448/Jj4+no0bN3LjjTdyqqseVFZWcv311zNnzhxGjBjBoUOHaNeuXZ3LxsbGkpeXxzPPPMPEiRNZunQpycnJnHnmmdx7773s2bOHOXPm8OWXXxITE8Odd97JG2+8wS233MJjjz1GcnIyPp+PsWPHsnLlSs4991wAUlNTWbZsGc8//zxPPvkkL730UkM+6UaJjITg99mQkTFtzKJFi1i0aBFDhw4FoLS0lI0bNzJmzBjuu+8+7r//fq688krGjBlTb11VVVXcdddd5Ofn4/F42LBhwymXX79+Penp6YwYMQKADh06nHTZCROcG0dmZmYyaNCgmusf9e3bl23btvHFF1+wdOnSmrrKy8vp0qULAHPnzmXmzJl4vV527tzJmjVrahLC5MmTAecS3u+++26929gcIqOV9FfZpa+NaahT/JJvDarKAw88wC9/+csT5i1btowFCxYwY8YMxo4dy0MPPXTc/Pfee4/f//73ALz00kt88MEHdO3alRUrVuD3+4mPd26nGx0djd/vr1mvvstd//SnP2X58uV069atzstdB16iOvBy17feeiuPP/74cXVt2bKFJ598ktzcXDp37syUKVOOe//qujweT723EG0ukbEPwYaMjGkTAi9/fdlllzFr1qyaO5Nt376dPXv2sGPHDhISErj55puZNm0ay5YtO2Hdq6++uuay1NnZ2Rw8eJD09HSioqJ47bXXasbwe/XqxZo1a6ioqKCkpIRPP/0UcC5RvXPnTnJzcwHnstRer5eXX36Z/Pz8mmQQjLFjxzJv3jz27NkDwP79+/n+++85dOgQiYmJdOzYkd27d7NwYcvdVj5YkfGz2e+1ncrGtAGBl78eP348P/7xjzn//PMB517Hr7/+OgUFBUybNq3mBjTV9yiYOnUqOTk5dOvWjcWLFx9X75133sk111zD7NmzycnJqbnxTo8ePbjuuusYPHgwffr0qRmeio2NZc6cOdx9992Ul5fTrl07Pvnkk+PutxysgQMH8oc//IFx48bh9/uJiYnhueeeY9SoUQwdOpT+/fvTo0cPRo8e3ZSPrlkEdflrEckBngE8wEuq+kSt+T2BV4FO7jLTVXWBiFwKPAHEApXANFX9zF1nCZAOVN8Be5yq7jlVHI2+/PWb18PhnfDLzxu+rjERxC5/3fY15fLX9fYQRMQDPAdcChQBuSIyX1UD7w4xA5irqi+IyEBgAdAb2Atcpao7RGQw8BHQPWC9m1S1ES18A/UYCRV134XJGGOMI5gho5FAgapuBhCRt4GJQGBCUKB6N3xHYAeAqi4PWGY10E5E4lS1oqmBN8iY+1r17Ywxpi0KZqdyd2BbwOsijv+VD/A74GYRKcLpHdxdRz3XAMtqJYOXRSRfRP5NpO4rz4nIVBHJE5G84uLiIMI1xhjTGM11lNGNwCuqmgFcDrwmIjV1i8gg4I9A4PFjN6lqJjDGffykropVdaaqZqtqdlpaWjOFa4w5mbZ0W11zvKb+7YJJCNuBHgGvM9yyQD8H5roBfQXEA6kAIpIBvAfcoqqbqldQ1e3u82HgTZyhKWNMCMXHx7Nv3z5LCm2QqrJv376acywaI5h9CLlAPxHpg5MIbgB+XGuZrcBY4BURGYCTEIpFpBPwIc5RR19WLywi0UAnVd0rIjHAlcAnjd4KY0yzyMjIoKioCBuebZvi4+PJyMho9Pr1JgRV9YrIXThHCHmAWaq6WkQeAfJUdT5wH/CiiNyLs4N5iqqqu95ZwEMiUn0q4TigDPjITQYenGTwYqO3whjTLGJiYujTp0+owzAhEtR5COGi0echGGNMBAv2PITIuHSFMcaYellCMMYYA7SxISMRKQa+b+TqqThnTocbi6thLK6GC9fYLK6GaUpcvVS13uP221RCaAoRyQtmDK21WVwNY3E1XLjGZnE1TGvEZUNGxhhjAEsIxhhjXJGUEGaGOoCTsLgaxuJquHCNzeJqmBaPK2L2IRhjjDm1SOohGGOMOYWISAgikiMi60WkQESmt/J7zxKRPSKyKqAsWUQ+FpGN7nNnt1xE5Fk3zpUiMqwF4+ohIotFZI2IrBaRfwmH2EQkXkS+FZEVbly/d8v7iMg37vvPEZFYtzzOfV3gzu/dEnG57+URkeUi8kG4xOS+X6GIfOdeSj7PLQuH71gnEZknIutEZK2InB/quETkHPdzqn4cEpF/DXVc7nvd637nV4nIW+7/Qut+x1T1tH7gXCtpE9AX51aeK4CBrfj+FwLDgFUBZX/CueAfwHTgj+705cBCQIBRwDctGFc6MMydTgI2AANDHZtbf3t3Ogb4xn2/ucANbvl/Ane403cC/+lO3wDMacHP7Nc4V+b9wH0d8pjc9ygEUmuVhcN37FXgF+50LM4tdkMeV0B8HmAX0CvUceHcY2YL0C7guzWltb9jLfqBh8MDOB/4KOD1A8ADrRxDb45PCOuBdHc6HVjvTv8XcGNdy7VCjO/j3CY1bGIDEoBlwHk4J+RE1/6b4lx08Xx3OtpdTloglgzgU+CHwAduAxHSmAJiK+TEhBDSvyPOnRO31N7uUMdVK5ZxwJfhEBfHbkSW7H5nPgAua+3vWCQMGQVzx7fW1lVVd7rTu4Cu7nRIYnW7m0Nxfo2HPDZ3aCYf2AN8jNPDK1FVbx3vXROXO/8gkNICYT0N/Bbwu69TwiCmagosEpGlIjLVLQv137EPUIxzV8TlIvKSiCSGQVyBbgDecqdDGpc694d5EudWAjtxvjNLaeXvWCQkhLCmTooP2aFeItIeeAf4V1U9FDgvVLGpqk9Vh+D8Kh8J9G/tGAKJyJXAHlVdGso4TuEHqjoMGA/8SkQuDJwZor9jNM5Q6QuqOhTnkvfH7b8L5XffHYufAPy/2vNCEZe7z2IiTiLtBiQCOa0ZA0RGQgjmjm+tbbeIpAO4z3vc8laNVZz7UbwDvKGq74ZTbACqWgIsxukqdxLnxkq137smLnd+R2BfM4cyGpggIoXA2zjDRs+EOKYaeuzug3tw7k44ktD/HYuAIlX9xn09DydBhDquauNx7vG+230d6rh+BGxR1WJVrQLexfnetep3LBISQs0d39xfBTcA80Mc03zgVnf6Vpzx++ryW9wjG0YBBwO6sc1KRAT4G7BWVf8SLrGJSJo4d9pDRNrh7NdYi5MYrj1JXNXxXgt85v7Cazaq+oCqZqhqb5zvz2eqelMoY6omIokiklQ9jTMuvooQ/x1VdRewTUTOcYvGAmtCHVeAGzk2XFT9/qGMayswSkQS3P/N6s+rdb9jLbnTJlweOEcKbMAZi36wld/7LZwxwSqcX00/xxnr+xTYiHO3uGR3WQGec+P8Dshuwbh+gNMtXgnku4/LQx0bcC6w3I1rFfCQW94X+BYowOnmx7nl8e7rAnd+3xb+e17MsaOMQh6TG8MK97G6+vsd6r+j+15DgDz3b/l3oHOYxJWI82u6Y0BZOMT1e2Cd+71/DYhr7e+YnalsjDEGiIwhI2OMMUGwhGCMMQawhGCMMcZlCcEYYwxgCcEYY4zLEoIxxhjAEoIxxhiXJQRjjDEA/P/6D2QtdvrSWgAAAABJRU5ErkJggg==\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "import matplotlib.pyplot as plt\n",
    "%matplotlib inline\n",
    "result[['train-auc-mean','test-auc-mean']].plot()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 训练+预测"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [],
   "source": [
    "def pipeline(dtrain,dtest,test_id,iteration,random_seed,gamma,max_depth,lambd,subsample,colsample_bytree,min_child_weight):\n",
    "    if max_depth==6:\n",
    "        num_boost_round = 400\n",
    "    elif max_depth==7:\n",
    "        num_boost_round = 350\n",
    "    elif max_depth==8:\n",
    "        num_boost_round = 300\n",
    "    \n",
    "    params={\n",
    "    'booster':'gbtree',\n",
    "    'objective': 'binary:logistic',\n",
    "    'early_stopping_rounds':100,\n",
    "    'scale_pos_weight': float(len(train_ay)-np.sum(train_ay.values))/float(np.sum(train_ay.values)),  # 负例样本除以正例样本\n",
    "    'eval_metric': 'auc',\n",
    "    'gamma':gamma,\n",
    "    'max_depth':max_depth,\n",
    "    'lambda':lambd,\n",
    "    'subsample':subsample,\n",
    "    'colsample_bytree':colsample_bytree,\n",
    "    'min_child_weight':min_child_weight, \n",
    "    'eta': 0.04,\n",
    "    'seed':random_seed,\n",
    "    'nthread':16\n",
    "        }\n",
    "    watchlist  = [(dtrain,'train_ax')]\n",
    "    model = xgb.train(params,dtrain,num_boost_round=num_boost_round,evals=watchlist)\n",
    "    model.save_model('./unlabel_model/xgb{0}.model'.format(iteration))\n",
    "    \n",
    "    #predict test set\n",
    "    test_y = model.predict(dtest)\n",
    "    test_result = pd.DataFrame(test_id,columns=[\"id\"])\n",
    "    test_result['score'] = test_y\n",
    "    test_result.to_csv(\"./unlabel_preds/xgb{0}.csv\".format(iteration),index=None,encoding='utf-8')\n",
    "    \n",
    "    #get feature score\n",
    "    feature_score = model.get_fscore()\n",
    "    feature_score = sorted(feature_score.items(), key=lambda x:x[1],reverse=True)\n",
    "    fs = []\n",
    "    for (key,value) in feature_score:\n",
    "        fs.append(\"{0},{1}\\n\".format(key,value))\n",
    "    \n",
    "    with open('./unlabel_featurescore/feature_score_{0}.csv'.format(iteration),'w') as f:\n",
    "        f.writelines(\"feature,score\\n\")\n",
    "        f.writelines(fs)\n",
    "    "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "%%time\n",
    "random_seed = list(range(0,10000,25))\n",
    "gamma = [i/1000.0 for i in range(100,200,2)]\n",
    "max_depth = [6,7,8]\n",
    "lambd = list(range(200,400,2))\n",
    "subsample = [i/1000.0 for i in range(600,700,2)]\n",
    "colsample_bytree = [i/1000.0 for i in range(250,350,2)]\n",
    "min_child_weight = [i/1000.0 for i in range(200,300,2)]\n",
    "random.shuffle(random_seed)\n",
    "random.shuffle(gamma)\n",
    "random.shuffle(max_depth)\n",
    "random.shuffle(lambd)\n",
    "random.shuffle(subsample)\n",
    "random.shuffle(colsample_bytree)\n",
    "random.shuffle(min_child_weight)\n",
    "\n",
    "with open('params.pkl','wb') as f:\n",
    "    pickle.dump((random_seed,gamma,max_depth,lambd,subsample,colsample_bytree,min_child_weight),f)\n",
    "\n",
    "#to reproduce my result, uncomment following lines\n",
    "\"\"\"\n",
    "with open('params_for_reproducing.pkl','rb') as f:\n",
    "    random_seed,gamma,max_depth,lambd,subsample,colsample_bytree,min_child_weight = pickle.load(f)    \n",
    "\"\"\"\n",
    "\n",
    "for i in range(36):\n",
    "    pipeline(dtrain,dvalid,valid_id,i,random_seed[i],gamma[i],max_depth[i%3],lambd[i],subsample[i],colsample_bytree[i],min_child_weight[i])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
